// Package dependencies include (but not necessarily limited to):
// reghdfe

version 18
clear all
set more off
set matsize 2000

// Set your root directory
local root "/Set/Root/Here"
cd "`root'/stata/FARS"

**************************************************************************
* The .do file does everything from bringing in raw data to final analysis
* The basic ordering is:
* (1) bring in FARS data, create tract by month-of-sample dataset
* (2) bring in company data, create tract by month-of-sample dataset
* (3) merge 1:1 and create event study figures
* As explained below, the company data must be brought in manually
* This requires you to go in and open do file again, no big deal
**************************************************************************

*------------------------------------------------------*
*------DEFINE MATA FUNCTIONS FOR TRACT DISTANCES-------*
*------------------------------------------------------*

clear mata
mata

mata set matastrict off // Sometimes necessary to get the code to execute correctly

// This function computes distance-weighted averages of a variable (xvar) using tracts within a certain distance (cutoff)  
function weight_avg(real scalar cutoff, real scalar ownweight, real scalar poly_order, string scalar tousevar, string scalar lat, string scalar lon, string scalar xvar, string scalar x_weighted_var)
{
	// We type st_view(X=.,.,.) instead of st_view(X,.,.) because X has not yet been defined. If we first ran X=., then we could type st_view(X,.,.)
	st_view(latv_in=., ., lat, tousevar)
	st_view(lonv_in=., ., lon, tousevar)
	st_view(latv_out=., ., lat, tousevar)
	st_view(lonv_out=., ., lon, tousevar)
	st_view(X=., ., xvar, tousevar)
	st_view(X_weight=., ., x_weighted_var, tousevar)
	rowcount = rows( latv_in )
	for (i = 1; i <= rowcount; i++) {
		dist = ( ( latv_in :- latv_out[i] ) :^ 2 + ( lonv_in :- lonv_out[i] ) :^ 2) :^ 0.5
		dist[i] = 1
		weight = ( dist :< cutoff ) :/ ( 5 * dist :^ poly_order )
		weight[i] = ownweight
		X_weight[i] = X' * weight
	}
}

// This function matches a lat/lon point to the nearest Census tract population center   
function match_tract(real scalar accident_obs, string scalar tousevar_input, string scalar tousevar_output, string scalar lat, string scalar lon, string scalar xvar, string scalar x_match_var, string scalar x_match_dist)
{
	// We type st_view(X=.,.,.) instead of st_view(X,.,.) because X has not yet been defined.  If we first ran X=., then we could type st_view(X,.,.)
	st_view(latv_in=., ., lat, tousevar_input)
	st_view(lonv_in=., ., lon, tousevar_input)
	st_view(latv_out=., ., lat, tousevar_output)
	st_view(lonv_out=., ., lon, tousevar_output)
	st_view(X=., ., xvar, tousevar_input)
	st_view(X_match=., ., x_match_var, tousevar_output)
	st_view(X_match_dist=., ., x_match_dist, tousevar_output)
	for (i=1; i <= accident_obs; i++) {
		dist = ( ( latv_in :- latv_out[i] ) :^ 2 + ( lonv_in :- lonv_out[i] ) :^ 2 ) :^ 0.5
		weight = dist :== colmin( dist )
		if ( sum( weight ) == 1 ) {
			X_match[i] = X' * weight
		}
		else {
			if ( sum( weight ) <= 3 ) {
				assert( sum( weight ) > 0 )
				rseed(1)
				u = runiform( rows( X ), 1 )
				dist1 = dist + u * 0.001
				weight = dist1 :== colmin( dist1 )
				assert( sum( weight ) == 1 )
				X_match[i] = X' * weight
			}
			else {
				i
				assert( 1 == 2 )
				// Failure at this point implies that an accident was equally close to more than 3 Census tracts (very unlikely, thus implying an error)
			}
		}
		X_match_dist[i] = colmin(dist)
	}
}
end

*-------------------------------------------------------------------------*
*------BRING IN COUNTY POPULATION AND CBSA CODES FROM CENSUS BUREAU-------*
*-------------------------------------------------------------------------*
// https://www.census.gov/data/tables/2017/demo/popest/counties-total.html#par_textimage
// These data provide Census 2010 populations as well as estimated populations by year
// We are using 2013 population, at the beginning of our company data
// We drop a very small number of counties with no match, 10 out of 3000, presumably small or new
insheet using "`root'/raw data/other/co-est2017-alldata.csv", clear
keep state county popestimate2013
drop if county == 0  
rename state statecode
rename county countycode
rename popestimate2013 pop
sort statecode countycode
save "`root'/stata/FARS/intermediate/CensusPop.dta", replace

// https://www.census.gov/geographies/reference-files/time-series/demo/metro-micro/delineation-files.html
// These data provide a mapping from state/county codes to Core-based statistical areas (CBSAs)
// We are using the Jul 2015 CBSA delineation file
import excel "`root'/raw data/Census/CBSA.xls", sheet("List 1") cellrange(A2:L1901) firstrow clear
destring CBSACode MetropolitanDivisionCode CSACode FIPSStateCode FIPSCountyCode, replace
rename FIPSStateCode state
rename FIPSCountyCode county
rename CBSACode CBSAcode
rename CBSATitle CBSA
sort state county
save "`root'/stata/FARS/intermediate/CBSA.dta", replace


*---------------------------------------------*
*------BRING IN ACCIDENT DATA FROM FARS-------*
*---------------------------------------------*
* CREATE A COMPLETE PANEL OF MONTH-BY-YEAR OBSERVATIONS
* We initially downloaded data through 2015 from NHTSA website ftp://ftp.nhtsa.dot.gov/fars/ in Feb 2017 
* For 2015, we simply downloaded as CSV, unzipped, and add the year to the file name
* For earlier years CSV is not available so i downloaded DBF, unzipped, StatTransferred, and added year to file name
* We're using only the `accident' file, but will need the `vehicle' and `person' files for driver age and gender, for example

// Then in December 2018, we downloaded data from 2016 and 2017
// As we had done earlier with 2015, we simply downloaded zip file with all CSV files and unzipped accident, vehicle, and person files

// VE_TOTAL - # of vehicles involved in the crash, this variable didn't exist before 2004
// PERSONS - # of people involved in the crash
// PEDS - # of pedestrians involved in the crash
// NO_LANES - # of lanes of the road the crash occurred on
// SP_LIMIT – The speed limit at the location of the crash
// FATALS - # of people who died as a result of the crash
// DRUNK_DR - # of alcohol influenced drivers involved in the crash. The 2016+ codebook provides conflicting information on this variable. The text describes it as "discontinued" following 2015, but the summary table marks it as continuing with no changes, and it appears in the data files with roughly the same distribution. What is indisputable is that there is a change in coding in 2008, which is described in the codebook and also appears in the raw trends -- from 2001 to 2007 it corresponds to number of alcohol influenced persons, rather than drivers. Ideally we should rederive it from DR_DRINK in the vehicle file or DRINKING in the persons file.

* Prepare Census tract lat-lon data for merges
tempfile temp_tracts
import delimited "`root'/raw data/Census/CenPop2010_Mean_TR.txt", varnames(1) clear
gen double tract_id = statefp * 10^9 + countyfp * 10^6 + tractce
format tract_id %11.0f
rename population tract_population
label var tract_population "2010 Census population"
rename latitude lat
label var lat "latitude"
rename longitude lon
label var lon "longitude"
sort tract_id
save "`temp_tracts'", replace

* Bring in Raw Data
tempfile tempacc tempveh tempper
insheet using "`root'/raw data/FARS/accident2000.csv", clear 
save "`tempacc'"
foreach x of numlist 2001/2017 {
	insheet using "`root'/raw data/FARS/accident`x'.csv", clear 
	append using "`tempacc'"
	save "`tempacc'", replace
}
gen byte highway_acc = 0 if traf_flo != . & year < 2010
replace highway_acc = 1 if ( traf_flo == 2 | traf_flo == 3 | traf_flo == 6 )
replace highway_acc = 1 if ( traf_flo == 5 & year < 2003 )
keep st_case state county city day month year hour day_week fatals drunk_dr ve_total ve_forms persons peds latitude longitud highway_acc
rename longitud longitude
rename state statecode
rename county countycode
rename city citycode
destring, replace
compress
save "`tempacc'", replace

*-------------------------------------------------------*
*------BRING IN VEHICLE AND PERSON DATA FROM FARS-------*
*-------------------------------------------------------*
// Downloaded from NHTSA website ftp://ftp.nhtsa.dot.gov/fars/
// Kept only the vehicle and person files
// Added the year to each file manually
// In most years data are available as .DBF which we converted to CSV using Stattransfer
// st_case uniquely identifies each accident
// st_case veh_no uniquely identifies each vehicle
// st_case veh_no per_no uniquely identified each person

// Bring in raw vehicle data
// Odd string error with mcarr_id, a variable we're not using, so we drop it
insheet using "`root'/raw data/FARS/vehicle2000.csv", clear
gen int year = 2000
save "`tempveh'"
foreach x of numlist 2001/2017 {
	insheet using "`root'/raw data/FARS/vehicle`x'.csv", clear 
	gen int year = `x'
	capture: drop mcarr_id
	append using "`tempveh'"
	save "`tempveh'", replace
}
keep year st_case state veh_no numoccs deaths dr_drink prev_dwi p_crash2 vtrafway

// Label variables
label variable numoccs "Number of Occupants" 
label variable deaths "Number of Fatalities in the Vehicle" 
label variable dr_drink "Indicator variable for whether driver had been drinking" 
label variable prev_dwi "Number of previous DWI convictions within 5 years"
label variable p_crash2 "Critical event that made this crash imminent"
label variable vtrafway "Trafficway Description"

// Keep variable for *any* person having previous DWI
// This is the number of previous DWIs by driver in last 3 years (or 5 years starting 2015)
tab prev_dwi
gen byte previousDWI = 0
replace previousDWI = 1 if prev_dwi >= 1 & prev_dwi <= 9
replace numoccs = 1 if numoccs >= 97
confirm byte variable vtrafway
gen byte highway = 0 if vtrafway != .
replace highway = 1 if ( vtrafway == 2 | vtrafway == 3 | vtrafway == 6 )
collapse (sum) previousDWI numoccs (max) highway, by( year st_case )
sum
compress
sort year st_case
save "`tempveh'", replace

// Bring in person data
insheet using "`root'/raw data/FARS/person2000.csv", clear
gen int year = 2000
save "`tempper'"
foreach x of numlist 2001/2017 {
		insheet using "`root'/raw data/FARS/person`x'.csv", clear 
		gen int year = `x'
		append using "`tempper'"
		save "`tempper'", replace
}
keep year st_case state veh_no per_no age sex per_typ drinking inj_sev

// Label person type, then keep drivers only
label variable per_typ "Person Type" 
label define per_typ 1 "Driver" 2 "Passenger" 3 "Occupant of Parked Motor Vehicle" /// 
                     4 "Occupant of Parked Non-Motor Vehicle" 5 "Pedestrian" 6 "Bicylist" ///
					 7 "Other" 8 "Other" 9 "Other" 10 "Other" 19 "Other"
label value per_typ per_typ
tab per_typ
drop per_typ

// Label other variables
label variable drinking "Indicator for whether alcohol was involved for this person"
replace drinking = 0 if drinking == 8 | drinking == 9 // Assign zero for not reported or unknown
gen byte drinking_over44 = drinking * ( age >= 45 & age < 200 )
gen byte drinking_under45 = drinking * ( age < 45 )

// Keep fatalities only
tab inj_sev
gen byte dead = ( inj_sev == 4 | inj_sev == 6 )
gen byte deadmales = 0
replace deadmales = 1 if sex == 1 & dead
gen byte deadfemales = 0
replace deadfemales = 1 if sex == 2 & dead
gen byte dead10s = 0
replace dead10s = 1 if age < 20 & dead
gen byte dead20s = 0
replace dead20s = 1 if age >= 20 & age <= 29 & dead
gen byte dead30s = 0
replace dead30s = 1 if age >= 30 & age <= 39 & dead
gen byte dead40s = 0
replace dead40s = 1 if age >= 40 & age <= 49 & dead
gen byte dead50s = 0
replace dead50s = 1 if age >= 50 & age <= 59 & dead
gen byte dead60s = 0
replace dead60s = 1 if age >= 60 & age < 200 & dead
gen byte deadover44 = 0
replace deadover44 = 1 if age >= 45 & age < 200 & dead
gen byte deadunder45 = 0
replace deadunder45 = 1 if age < 45 & dead
gen byte deadpeds = 0
replace deadpeds = 1 if veh_no == 0 & dead

// Collapse by crash
collapse (sum) dead* drinking_*, by( year st_case )

// Merge with vehicle data
sum
sort year st_case
merge 1:1 year st_case using "`tempveh'" // Vehicle data previously collapsed to accident level
assert _merge == 3
drop _merge
label var highway "Divided roadway or entrance/exit ramp"

// Merge with FARS accident data
sort year st_case
merge m:1 year st_case using "`tempacc'"
assert _merge != 2 // < 0.1% obs have no match in accident data, presumably typos
keep if _merge == 3
drop _merge
assert highway == . if year < 2010
assert highway_acc == . if year > 2009
replace highway = highway_acc if year < 2010
drop highway_acc
gen byte fatals_singveh = ( fatals - deadpeds ) * ( ve_forms == 1 )
gen byte fatals_multiveh = ( fatals - deadpeds ) * ( ve_forms > 1 & ve_forms != . )
compress

// Drop a tiny <.001% of observations with no county code
drop if countycode == 0 | countycode == 997 | countycode == 999

// Assign accidents to Census tracts
gen byte accident_data = 1
append using "`temp_tracts'", keep( tract_id lat lon statefp countyfp )
replace accident_data = 0 if accident_data == .
gen byte tract_data = 1 - accident_data
// Lat lon quality control follows
replace accident_data = 0 if latitude + longitude == .
// Goodbye Utqiagvik (Barrow), AK!
replace accident_data = 0 if latitude > 65
// Key West, FL can stay
replace accident_data = 0 if latitude < 24
// Eastport, ME can stay
replace accident_data = 0 if longitude > -67
// Nome, AK can stay
replace accident_data = 0 if longitude < -166
replace statecode = statefp if statecode == . & statefp != .
replace countycode = countyfp if countycode == . & countyfp != .
// Special exceptions for AK, CO, and VA due to county border changes
egen state_county = group( statecode countycode ) ///
	if statecode != 2 & statecode != 8 & statecode != 51
sum state_county, meanonly
local end_sc = r(max)
tempvar accident_data_sc tract_data_sc
gen byte `accident_data_sc' = 0
gen byte `tract_data_sc' = 0
gen float tract_match_dist = .
gen double tract_id_accident = .
replace lat = latitude if accident_data
replace lon = longitude if accident_data
forvalues sc = 1(1)`end_sc' {
	quietly replace `accident_data_sc' = accident_data * ( state_county == `sc' )
	quietly replace `tract_data_sc' = tract_data * ( state_county == `sc' )
	sum `accident_data_sc', meanonly
	local accident_obs = r(sum)
	mata: match_tract( `accident_obs', "`tract_data_sc'", "`accident_data_sc'", "lat", "lon", "tract_id", "tract_id_accident", "tract_match_dist")
}
// Run AK, CO, and VA separately (they had some county boundary revisions)
foreach sc of numlist 2 8 51 {
	quietly replace `accident_data_sc' = accident_data * ( statecode == `sc' )
	quietly replace `tract_data_sc' = tract_data * ( statecode == `sc' )
	sum `accident_data_sc', meanonly
	local accident_obs = r(sum)
	mata: match_tract( `accident_obs', "`tract_data_sc'", "`accident_data_sc'", "lat", "lon", "tract_id", "tract_id_accident", "tract_match_dist")
}
// Quality control - drop obs more than one degree (about 60 miles) from the nearest Census tract center. In many cases this occurs because the county and lat-lon coordinates don't match
replace tract_id_accident = . if tract_match_dist > 1
gen double tract_id_merge = tract_id_accident
format tract_id_merge %11.0f
replace tract_id_merge = tract_id if tract_data == 1
keep if tract_id_merge != .
gen byte drunkfatals = 0
replace drunkfatals = fatals if drunk_dr > 0 & drunk_dr != .
gen byte drunkfatals_singveh = 0
replace drunkfatals_singveh = fatals_singveh if drunk_dr > 0 & drunk_dr != .
gen byte drunkfatals_multiveh = 0
replace drunkfatals_multiveh = fatals_multiveh if drunk_dr > 0 & drunk_dr != .
gen byte drunkdeadpeds = 0
replace drunkdeadpeds = deadpeds if drunk_dr > 0 & drunk_dr != .
gen byte drunk_over44 = 0
replace drunk_over44 = deadover44 if deadover44 + drunk_dr != . & drunk_dr > 0
gen byte drunk_under45 = 0
replace drunk_under45 = deadunder45 if deadunder45 + drunk_dr != . & drunk_dr > 0
// For tracts with zero FARS obs, set date to Jan 2001 and fill in zero fatals (2000 lacks lat-lon data)
replace year = 2001 if tract_data == 1
replace month = 1 if tract_data == 1
replace fatals = 0 if tract_data == 1

// Generate fatalities by tract by hour of day and day of week
local depvars "fatals drunkfatals deadpeds drunkdeadpeds drunk_under45 drunk_over44"
foreach depvar of varlist `depvars' {
	gen byte `depvar'_hr0 = `depvar' * ( hour == 0 | hour == 24 )
	forvalues h = 1(1)23 {
		gen byte `depvar'_hr`h' = `depvar' * ( hour == `h' )
	}
	forvalues d = 1(1)7 {
		gen byte `depvar'_d`d' = `depvar' * ( day_week == `d' )
	}
}
// Generate fatalities that match up to company weekend period and weekend nights
local depvars "fatals fatals_singveh fatals_multiveh drunkfatals drunkfatals_singveh drunkfatals_multiveh deadpeds drunkdeadpeds deadunder45 deadover44 drunk_under45 drunk_over44"
foreach depvar of varlist `depvars' {
	gen byte `depvar'_wknd = `depvar' * ( ( hour > 19 & day_week == 6 ) | day_week == 7 | (hour < 5 & day_week == 1) )
	gen byte `depvar'_wknd_night = `depvar' * ( ( hour > 19 & day_week == 6 ) | ( hour < 5 & day_week == 7 ) ///
		|  ( hour > 19 & day_week == 7 ) | (hour < 5 & day_week == 1) )
}
// Generate fatalities by tract by hour of day by highway
forvalues hwy = 0(1)1 {
	gen byte fatals_hr0_hwy`hwy' = fatals * ( hour == 0 | hour == 24 ) * ( highway == `hwy' )
	gen byte drunkfatals_hr0_hwy`hwy' = drunkfatals * ( hour == 0 | hour == 24 ) * ( highway == `hwy' )
	forvalues h = 1(1)23 {
		gen byte fatals_hr`h'_hwy`hwy' = fatals * ( hour == `h' ) * ( highway == `hwy' )
		gen byte drunkfatals_hr`h'_hwy`hwy' = drunkfatals * ( hour == `h' ) * ( highway == `hwy' )
	}
}
// Generate interactions with highway variable
forvalues hwy = 0(1)1 {
	gen byte fatals_wknd_hwy`hwy' = fatals * ( ( hour > 19 & day_week == 6 ) | day_week == 7 | (hour < 5 & day_week == 1) ) * ( highway == `hwy' )
	gen byte drunkfatals_wknd_hwy`hwy' = drunkfatals * ( ( hour > 19 & day_week == 6 ) | day_week == 7 | (hour < 5 & day_week == 1) ) * ( highway == `hwy' )
	gen byte fatals_wknd_night_hwy`hwy' = fatals * ( ( hour > 19 & day_week == 6 ) | ( hour < 5 & day_week == 7 ) ///
		|  ( hour > 19 & day_week == 7 ) | (hour < 5 & day_week == 1) ) * ( highway == `hwy' )
	gen byte drunkfatals_wknd_night_hwy`hwy' = drunkfatals * ( ( hour > 19 & day_week == 6 ) | ( hour < 5 & day_week == 7 ) ///
		|  ( hour > 19 & day_week == 7 ) | (hour < 5 & day_week == 1) ) * ( highway == `hwy' )
}

tempfile scratchfile fatalsfile
compress
save "`scratchfile'"

// Separate fatalities and drunk fatalities into two separate data sets to conserve memory for the fill operation

// Program to perform the collapse and fill
cap program drop collapse_fill_data
program define collapse_fill_data
syntax varlist, using(string) [outfile(string)]
	collapse (sum) `varlist', by( tract_id_merge year month )
	compress `varlist'
	foreach depvar of varlist `1' {
		label var `depvar'_d1 "Sunday"
		label var `depvar'_wknd "Fri 8 pm to Sun 4 am"
		label var `depvar'_wknd_night "Weekend nights 8 pm to 4 am"
	}
	// Fill in missing tract-by-year-by-month observations
	sum year, meanonly
	assert r(min) == 2001
	gen int time = ( year - 2001 ) * 12 + month
	tsset tract_id_merge time
	tsfill, full
	foreach depvar of varlist `varlist' {
		replace `depvar' = 0 if `depvar' == .
	}
	replace year = floor( ( time - 0.5 ) / 12 ) + 2001 if year == .
	replace month = time - 12 * ( year - 2001 ) if month == .
	bysort time: sum month year
	rename tract_id_merge tract_id
	keep tract_id month year `varlist'
	// Merge with tract information for completeness
	sort tract_id
	merge m:1 tract_id using "`using'"
	assert _merge == 2 | _merge == 3
	drop _merge
	compress `varlist'
	sort tract_id year month
	if ( "`outfile'" != "" ) {
		save "`outfile'"	
	}
end

// Collapse to tract by month-of-sample for all-cause fatalities variables
collapse_fill_data fatals fatals_* deadpeds deadpeds_* deadunder45  deadunder45_* deadover44 deadover44_*, using(`temp_tracts') outfile(`fatalsfile')

// Collapse to tract by month-of-sample for drunk fatalities variables
use "`scratchfile'"
collapse_fill_data drunkfatals drunkfatals_* drunk_under45 drunk_over44 drunk_under45_hr* drunk_over44_hr* ///
	drunk_under45_d* drunk_over44_d* drunk_under45_wknd* drunk_over44_wknd* drunkdeadpeds drunkdeadpeds_*, using(`temp_tracts')

merge 1:1 tract_id year month using "`fatalsfile'"
assert _merge == 3
drop _merge
compress fatals* drunk* deadpeds*

* Save for merging with company data
save "`root'/stata/FARS/intermediate/FARS_tracts.dta", replace

// Import the company data
// Must have encrypted sparsebundle mounted to run; otherwise set flag below to 0 and open company data manually
if 1 {
	import delimited "/Volumes/Uber data read-only/uc_berkeley_trip_index/trips_final_2.csv", clear
}
else {
	exit
}

*---------------------------------------------------------------*
*------CLEAN COMPANY DATA AND CREATE SUMMARY STATS--------------*
*---------------------------------------------------------------*
// Before running this part of code, open up the STATA file with company data
sum
drop v1 // Some kind of internal ID?
format tract_id %11.0f

// Merge in tract lat-lon and population data
merge m:1 tract_id using "`temp_tracts'"
tab _merge
assert _merge != 1
drop if _merge == 2
drop _merge	

// Fix the date variables
rename month string_date
gen date = date( string_date, "YMD" )
gen int year = year( date )
gen byte month = month( date )

// Look at which areas have "high" trip volumes in June 2016
// Corresponds closely with what we know about company
// browse if year == 2016 & month == 6 & trip_index_ex_airport > 0.2

// The index variable is the number of trips leaving that census tract during that month
// The index excludes trips leaving for the aiport
// For confidentiality, the index is normalized where one corresponds to a particular tract in SF in May 2015
// For observations with index below .0005 (46% of all observations), they've set to -1
rename trip_index_ex_airport index

// The index_round_flag variable is redundant, and we drop it
// All that variable does is tell us which observations have indices set to -1
assert index == -1 & weekend_trip_pct_ex_airport == -1 if trip_index_round_flag == 1
drop trip_index_round_flag

// Clean out 10 tract-month-year combinations that have two observations each (drop the 0 activity versions)
sort tract_id year month
drop if ( ( tract_id == tract_id[_n-1] & year == year[_n-1] & month == month[_n-1] ) | ///
	( tract_id == tract_id[_n+1] & year == year[_n+1] & month == month[_n+1] ) ) & index == -1

// We are aggregating up to total trips in a given geographic area
// Consequently, we replace all the -1's with zeros
// This is not exactly right, but should be very close
replace index = 0 if index == -1

// Generate weekend trip index
assert index == 0 if weekend_trip_pct_ex_airport == -1
gen index_wknd = index * weekend_trip_pct_ex_airport / 100

// Create Census tract weighted index variable
tempvar x_sample time
// Generate lat/lon variables that represent approximately 1 mile (longitude is a bit shorter than that near Canada and somewhat longer than that near Mexico)
gen float lat_miles = lat / 0.017
gen float lon_miles = lon / 0.017
gen byte `x_sample' = 0
foreach order in "" "_09" "_11" {
	gen float index_tract_weighted`order' = .
	gen float index_wknd_tract_weighted`order' = .
}
egen byte `time' = group( year month )
sum `time', meanonly
local endt = r(max)
forvalues t = 1(1)`endt' {
	dis "Computing weights for time period `t'"
	replace `x_sample' = ( index != . & `time' == `t' )
	mata: weight_avg( 10, 1, 1, "`x_sample'", "lat_miles", "lon_miles", "index", "index_tract_weighted")
	mata: weight_avg( 10, 1, 1, "`x_sample'", "lat_miles", "lon_miles", "index_wknd", "index_wknd_tract_weighted")
}
forvalues t = 1(1)`endt' {
	dis "Computing weights for time period `t'"
	replace `x_sample' = ( index != . & `time' == `t' )
	mata: weight_avg( 10, 1, 0.9, "`x_sample'", "lat_miles", "lon_miles", "index", "index_tract_weighted_09")
	mata: weight_avg( 10, 1, 0.9, "`x_sample'", "lat_miles", "lon_miles", "index_wknd", "index_wknd_tract_weighted_09")
}
forvalues t = 1(1)`endt' {
	dis "Computing weights for time period `t'"
	replace `x_sample' = ( index != . & `time' == `t' )
	mata: weight_avg( 10, 1, 1.1, "`x_sample'", "lat_miles", "lon_miles", "index", "index_tract_weighted_11")
	mata: weight_avg( 10, 1, 1.1, "`x_sample'", "lat_miles", "lon_miles", "index_wknd", "index_wknd_tract_weighted_11")
}
drop `x_sample' `time'

// Examine the distribution of the main index variable
// Big right skew reflecting a small number of tracts with lots of rides
histogram index if index > .02

// Worth noting that the number of observations increases each year
// E.g. there are 10x as many observations 2016 as in 2012
// I think this means they gave us data only from markets where company is operating
// Similar to the -1 coding above, I don't think this is a problem
tab year

// The number of trips increases 30-fold between 2013 and 2016
// For 2012 we have only half the year, so that total is necessarily much smaller
total index if year == 2012
total index if year == 2013
total index if year == 2014
total index if year == 2015
total index if year == 2016

*----------------------------------------------------*
*--------------MERGE WITH FARS DATA------------------*
*----------------------------------------------------*

// Large number of county by month-of-sample observations where the company isn't operating (_merge == 2)
// We keep these (_merge == 2) observations and simply infer that company activity is zero
// But, fortunately, no observations in the company data that aren't in FARS (_merge == 1)
merge 1:1 tract_id year month using "`root'/stata/FARS/intermediate/FARS_tracts.dta"

rename county countyname
// Extract state and county codes from tract_id variable
// There are seven states with 1-digit state codes
gen byte state = floor( tract_id * 10 ^ -9 )
gen int county = floor( ( tract_id - state * 10 ^ 9 ) * 10 ^ -6 )
destring, replace
// Per Trump, PR is not part of the US
drop if state == 72
assert state >= 1 & state <= 56
assert county >= 1 & county <= 1000
sum county, meanonly
assert r(max) > 100
// Drop redundant state and county ID variables
corr statefp state
assert r(rho) > 0.9999
corr countyfp county
assert r(rho) > 0.9999
drop statefp countyfp
compress
tab _merge
drop _merge

// Populate the state and county names from company data
sort state county
preserve
tempfile names
keep if state_ab !="" & countyname != "" 
collapse (first) state_ab countyname, by(state county)
compress
sort state county
save "`names'"
restore
drop state_ab countyname
merge m:1 state county using "`names'"
tab _merge
assert _merge == 1 | _merge == 3
drop _merge

// Merge in CBSAs
sort state county
preserve
tempfile cbsas
use "`root'/stata/FARS/intermediate/CBSA.dta", clear
keep CBSAcode CBSA state county
compress
save "`cbsas'"
restore
merge m:1 state county using "`cbsas'"
drop if state == 72
tab _merge
assert _merge == 1 | _merge == 3
drop _merge

// Turn off parallel option to reduce dependencies
// local parallel "parallel(6)"
local parallel ""

*--------------------------------------------------------------*
*-----------LOOK AT DATA FROM MOST ACTIVE COUNTIES-------------*
*--------------------------------------------------------------*

// Important Step -- Set Index to Zero for all data not in company data
// This is definitely incorrect for Seattle and NYC
// Otherwise is probably a very good assumption, unless there is some problem with company data
replace index = 0 if index == .
foreach order in "" "_09" "_11" {
	replace index_tract_weighted`order' = 0 if index_tract_weighted`order' == .
	replace index_wknd_tract_weighted`order' = 0 if index_wknd_tract_weighted`order' == .
	label var index_wknd_tract_weighted`order' "Fri 8pm to Sun 4am"
}

// Drop 2001 to 2006 data and 2017 data
// We don't have company data for 2017
// We don't currently use pre-2007 data, and memory demands are very high
cap drop date // Lots of missing values
drop if year <= 2000
drop if year == 2017

// Generate unique county ID and statistics 
egen countyid = group( state county )
bysort countyid: egen meanfatals = mean( drunkfatals ) if year <= 2005
by countyid: egen weightaccidents = max( meanfatals )
bysort tract_id: egen weightpop = mean( tract_population )
by tract_id: egen max_wgt_index = max( index_tract_weighted )

// Drop out early part of sample long before company was operating
// drop if year < 2012

// Generate variables for potential FEs
egen int yearmonth = group( year month )
egen int countybymonth = group( county month )
egen long cbsatime = group( CBSAcode year month )

// Create dependent variable
gen byte ifatals = fatals > 0
gen byte idrunkfatals = drunkfatals > 0
gen byte idrunkfatals_singveh = drunkfatals_singveh > 0
gen byte idrunkfatals_multiveh = drunkfatals_multiveh > 0
gen byte idrunkdeadpeds = drunkdeadpeds > 0
gen byte ifatals_singveh = fatals_singveh > 0
gen byte ifatals_multiveh = fatals_multiveh > 0
gen byte ideadpeds = deadpeds > 0
gen byte ideadunder45 = deadunder45 > 0
gen byte ideadover44 = deadover44 > 0
gen byte idrunk_under45 = drunk_under45 > 0
gen byte idrunk_over44 = drunk_over44 > 0
foreach depvar of varlist fatals drunkfatals {
	gen byte i`depvar'_20_to_6 = ( `depvar'_hr20 + `depvar'_hr21 + `depvar'_hr22 + `depvar'_hr23 ///
		+ `depvar'_hr0 + `depvar'_hr1 + `depvar'_hr2 + `depvar'_hr3 + `depvar'_hr4 + `depvar'_hr5 ) > 0
	gen byte i`depvar'_8_to_17 = ( `depvar'_hr8 + `depvar'_hr9 + `depvar'_hr10 + `depvar'_hr11 ///
		+ `depvar'_hr12 + `depvar'_hr13 + `depvar'_hr14 + `depvar'_hr15 + `depvar'_hr16 ) > 0
	gen byte i`depvar'_wknd = `depvar'_wknd > 0
	gen byte i`depvar'_wknd_night = `depvar'_wknd_night > 0
}
foreach depvar of varlist drunkfatals_singveh drunkfatals_multiveh drunkdeadpeds drunk_under45 drunk_over44 deadunder45 deadover44 {
	gen byte i`depvar'_wknd = `depvar'_wknd > 0
	gen byte i`depvar'_wknd_night = `depvar'_wknd_night > 0
}

*-------------------------------------------*
*----------FE REGRESSIONS, INDEX------------*
*-------------------------------------------*

local start_year = 2012

// Calculate Census tract percentiles by max index
centile max_wgt_index, centile(50 75 85 90)
local c_50 = round( r(c_1), 0.001)
local c_75 = round( r(c_2), 0.001)
local c_85 = round( r(c_3), 0.001)
local c_90 = round( r(c_4), 0.001)
local nonzero = 0.001

// Label dep vars
foreach lhsvar of varlist idrunkfatals idrunkfatals_20_to_6 idrunkfatals_wknd idrunkfatals_wknd_night idrunkfatals_8_to_17 {
	label var `lhsvar' "Drunk death"
}
foreach lhsvar of varlist idrunkfatals_8_to_17 {
	label var `lhsvar' "Drunk daytime death"
}

foreach lhsvar of varlist ifatals ifatals ifatals_20_to_6 ifatals_wknd ifatals_wknd_night ifatals_8_to_17 {
	label var `lhsvar' "Any death"
}
foreach lhsvar of varlist ifatals_8_to_17 {
	label var `lhsvar' "Any daytime death"
}

label var idrunkfatals_singveh "1-vehicle drunk death"
label var idrunkfatals_multiveh "Multi-vehicle drunk death"
label var idrunkdeadpeds "Pedestrian drunk death"
label var ifatals_singveh "1-vehicle death"
label var ifatals_multiveh "Multi-vehicle death"
label var ideadpeds "Pedestrian death"

// Tabulate summary stats for manual export to Table A1

capture: drop col_?
local counter = 0
forvalues i = 1(1)4 {
	if ( `i' == 1 ) {
		gen str10 col_`i' = ""	
	}
	else {
		gen float col_`i' = .	
	}
}

quietly reg year if year >= `start_year', vce(cluster tract_id)
local tracts_n = e(N_clust)
replace col_1 = "Census tracts" in `++counter'
replace col_2 = `tracts_n' in `counter'
quietly reg year if year >= `start_year' & max_wgt_index > `nonzero' & CBSAcode != ., vce(cluster tract_id)
local tracts_n = e(N_clust)
replace col_4 = `tracts_n' in `counter'

sum year if year >= `start_year', meanonly
replace col_1 = "Tract-month observations" in `++counter'
replace col_2 = r(N) in `counter'
sum year if year >= `start_year' & max_wgt_index > `nonzero' & CBSAcode != ., meanonly
replace col_4 = r(N) in `counter++'

foreach varmod in null drunk {
	if ( "`varmod'" == "null" ) {
		local varmod ""
	}
	local sumvar `varmod'fatals
	sum `sumvar' if year >= `start_year'
	replace col_1 = "Annual fatalities" in `++counter'
	replace col_2 = r(sum) / 5 in `counter'
	sum `sumvar' if year >= `start_year' &  max_wgt_index > `nonzero' & CBSAcode != .
	replace col_4 = r(sum) / 5 in `counter'

	local sumvar i`varmod'fatals
	sum `sumvar' if year >= `start_year'
	replace col_1 = "Probability of fatal crash (tract-month)" in `++counter'
	replace col_2 = r(mean) in `counter'
	sum `sumvar' if year >= `start_year' &  max_wgt_index > `nonzero' & CBSAcode != .
	replace col_4 = r(mean) in `counter'

	local sumvar `varmod'fatals_singveh
	sum `sumvar' if year >= `start_year'
	replace col_1 = "Annual single-vehicle fatalities" in `++counter'
	replace col_2 = r(sum) / 5 in `counter'
	sum `sumvar' if year >= `start_year' &  max_wgt_index > `nonzero' & CBSAcode != .
	replace col_4 = r(sum) / 5 in `counter'

	local sumvar `varmod'fatals_multiveh
	sum `sumvar' if year >= `start_year'
	replace col_1 = "Annual multi-vehicle fatalities" in `++counter'
	replace col_2 = r(sum) / 5 in `counter'
	sum `sumvar' if year >= `start_year' &  max_wgt_index > `nonzero' & CBSAcode != .
	replace col_4 = r(sum) / 5 in `counter'

	local sumvar `varmod'deadpeds
	sum `sumvar' if year >= `start_year'
	replace col_1 = "Annual pedestrian/cyclist fatalities" in `++counter'
	replace col_2 = r(sum) / 5 in `counter'
	sum `sumvar' if year >= `start_year' &  max_wgt_index > `nonzero' & CBSAcode != .
	replace col_4 = r(sum) / 5 in `counter++'
}

browse col_1-col_4

tempvar rhsvar_gen rhsvar_gen_09 rhsvar_gen_11 lhs_orig rhsvar_entry tract_var tag
gen float `rhsvar_gen' = .
gen float `rhsvar_gen_09' = .
gen float `rhsvar_gen_11' = .
gen float `lhs_orig' = .

// Outreg options
local outregoptions tex(frag) label bdec(3) sdec(3) noaster nonotes nocons nor

capture: drop pval_actual_tab pval_actual pval_placebo_tab pval_placebo
gen str3 pval_actual_tab = ""
gen float pval_actual = .
gen str3 pval_placebo_tab = ""
gen float pval_placebo = .
mata: pval_actual_tab = J( 50, 1, "" )
mata: pval_actual = J( 50, 1, . )
mata: pval_placebo_tab = J( 50, 1, "" )
mata: pval_placebo = J( 50, 1, . )
local pval_actual_count = 1
local pval_placebo_count = 1

// Rescale index to 2019 levels
// Removed the 2019 rescaling factor since it is confidential.
// However the t-statistics should still match up exactly if run against the original data.
local rescaling = 1
sum index_tract_weighted if max_wgt_index > `c_50' & year == 2016 & month > 9 // Avg value in Q4 2016
local rescale_50 = `rescaling' * r(mean) // Rescale factor to 2019
sum index_wknd_tract_weighted if max_wgt_index > `c_50' & year == 2016 & month > 9 // Avg value in Q4 2016
local rescale_50_wknd = `rescaling' * r(mean) // Rescale factor to 2019
foreach r in 09 11 {
	sum index_tract_weighted_`r' if max_wgt_index > `c_50' & year == 2016 & month > 9 // Avg value in Q4 2016
	local rescale_50_`r' = `rescaling' * r(mean) // Rescale factor to 2019
}
sum index_tract_weighted if max_wgt_index > `c_75' & year == 2016 & month > 9 // Avg value in Q4 2016
local rescale_25 = `rescaling' * r(mean) // Rescale factor to 2019

// Tables 1, A2, A3: Tract and Month-of-Sample Fixed Effects

// Column labels
local l1 = "Nonzero"
local l2 = "Top 50 pct"
local l3 = "Top 25 pct"
local l4 = "Top 15 pct"
local l5 = "Top 10 pct"

replace `rhsvar_gen' = index_tract_weighted / `rescale_50'
label var `rhsvar_gen' "Rideshare index"
foreach r in 09 11 {
	replace `rhsvar_gen_`r'' = index_tract_weighted_`r' / `rescale_50_`r''
	label var `rhsvar_gen_`r'' "Rideshare index"
}

local counter = 0
foreach rhsvar of varlist `rhsvar_gen' `rhsvar_gen_09' `rhsvar_gen_11' {
	local suffix ""
	if "`rhsvar'" == "`rhsvar_gen'" {
		local table "1"
	}
	if "`rhsvar'" == "`rhsvar_gen_09'" {
		local table "A2"
	}
	if "`rhsvar'" == "`rhsvar_gen_11'" {
		local table "A3"
	}
	foreach lhsvar of varlist idrunkfatals ifatals {
		quietly replace `lhs_orig' = `lhsvar'
		quietly replace `lhsvar' = 100 * `lhsvar'
		local i = 1
		foreach cutoff of numlist `nonzero' `c_50' `c_75' `c_85' `c_90' {
			reghdfe `lhsvar' `rhsvar' if max_wgt_index > `cutoff' & year >= `start_year', ///
				absorb(tract_id yearmonth) cluster(CBSAcode) `parallel'
			quietly egen `tag' = tag( tract_id ) if e(sample)
			count if `tag'
			local tracts_n = r(N)
			quietly drop `tag'
			quietly sum `lhsvar' if e(sample)
			local meany = r(mean)
			quietly sum `rhsvar' if e(sample)
			local meanx = r(mean)
			if ( "`suffix'" == "_all" ) {
				mata: pval_actual_tab[`pval_actual_count'] = "`table'"
				local b = _b[`rhsvar']
				local se = _se[`rhsvar']
				mata: pval_actual[`pval_actual_count++'] = 2 * ttail( 48, abs( `b' ) / `se' )
			}
			if ( !`counter' ) {
				if ( `i' == 1 ) {
					outreg2 using "`root'/paper/tables/RegsIndex`suffix'", keep(`rhsvar') `outregoptions' ctitle("") replace ///
						addstat(Tracts, `tracts_n', Mean dependent variable, `meany', Mean index, `meanx') ///
						addtext(Max tract rideshare activity, `l`i++'')
				}
				else {
					outreg2 using "`root'/paper/tables/RegsIndex`suffix'", keep(`rhsvar') `outregoptions' ctitle("") ///
						addstat(Tracts, `tracts_n', Mean dependent variable, `meany', Mean index, `meanx') ///
						addtext(Max tract rideshare activity, `l`i++'')
				}
			}
			else {
				if ( `i' == 1 ) {
					outreg2 using "`root'/paper/tables/RegsIndex`suffix'_poly`counter'", keep(`rhsvar') `outregoptions' ctitle("") replace ///
						addstat(Tracts, `tracts_n', Mean dependent variable, `meany', Mean index, `meanx') ///
						addtext(Max tract rideshare activity, `l`i++'')
				}
				else {
					outreg2 using "`root'/paper/tables/RegsIndex`suffix'_poly`counter'", keep(`rhsvar') `outregoptions' ctitle("") ///
						addstat(Tracts, `tracts_n', Mean dependent variable, `meany', Mean index, `meanx') ///
						addtext(Max tract rideshare activity, `l`i++'')
				}
			}
		}
		quietly replace `lhsvar' = `lhs_orig'
		local suffix "_all"
	}
	if ( !`counter' ) {
		local counter = `counter' + 9
	}
	else {
		local counter = `counter' + 2
	}
}

// Auxiliary analysis: Table 1 "Top 25 pct" column with 25% rescaling
replace `rhsvar_gen' = index_tract_weighted / `rescale_25'
reghdfe ifatals `rhsvar_gen' if max_wgt_index > `c_75' & year >= `start_year', absorb(tract_id yearmonth) cluster(CBSAcode) `parallel'
local beta = round( _b[`rhsvar_gen'] * 100, .001)
dis "********************************"
dis "Top 25 pct coefficient is `beta'"
dis "********************************"
replace `rhsvar_gen' = index_tract_weighted / `rescale_50'

// Auxiliary analysis: Fully interacted version of Table 1 "Top 50 pct" regression

// Flag to skip the (time consuming) regression that estimates a separate effect for each tract
local skip_interacted_regression = 1

if !`skip_interacted_regression' {
	preserve
	keep if year > 2011 & max_wgt_index > `c_50'

	// Remove aggregate time trends from RHS and LHS variables
	reg index_tract_weighted i.yearmonth
	predict index_tract_weighted_r, resid
	reg ifatals i.yearmonth
	predict ifatals_r, resid

	egen long tractid = group( tract_id )
	sum tractid, meanonly
	local maxtract = r(max)
	gen float coefficient = .
	sort tractid year month

	// Run 36,000+ tract-level regressions and save results
	forvalues i = 1(1)`maxtract' {
		local starter = 1 + 60 * ( `i' - 1 )
		local ender = `starter' + 59
		qui reg ifatals_r index_tract_weighted_r in `starter'/`ender'
		qui replace coefficient = _b[index_tract_weighted_r] in `i'
	}

	sum coefficient
	local ate = r(mean) * `rescale_50'
	dis "Average coefficient is `ate'"
	// Store result in Mata vector "betas" for later reference if needed
 	mata: betas = st_data((1,`maxtract'),"coefficient")
	restore
}

// Table 2: Tract and State-by-Month Fixed Effects

// Column labels
local l11 = "Top 50 pct"
local l12 = "Top 25 pct"
local l21 = "All"
local l22 = "8p-6a"

local l11 = "Nonzero"
local l12 = "Top 50 pct"
local l13 = "Top 25 pct"
local l14 = "Top 15 pct"
local l15 = "Top 10 pct"

cap drop statemonth
egen int statemonth = group( state yearmonth )
replace `rhsvar_gen' = index_tract_weighted / `rescale_50'

local drunk_list "idrunkfatals"
local all_list "ifatals"
local suffix "_all"
local table "2"
foreach list in all_list {
	local i = 1
	foreach cutoff of numlist `nonzero' `c_50' `c_75' `c_85' `c_90' {
		local j = 1
		foreach lhsvar of varlist ``list'' {
			quietly replace `lhs_orig' = `lhsvar'
			quietly replace `lhsvar' = 100 * `lhsvar'
			reghdfe `lhsvar' `rhsvar_gen' if max_wgt_index > `cutoff' & year >= `start_year', ///
				absorb(tract_id statemonth) cluster(CBSAcode) `parallel'
			quietly egen `tag' = tag( tract_id ) if e(sample)
			count if `tag'
			local tracts_n = r(N)
			quietly drop `tag'
			quietly sum `lhsvar' if e(sample)
			local meany = r(mean)
			quietly sum `rhsvar_gen' if e(sample)
			local meanx = r(mean)
			mata: pval_actual_tab[`pval_actual_count'] = "`table'"
			local b = _b[`rhsvar_gen']
			local se = _se[`rhsvar_gen']
			mata: pval_actual[`pval_actual_count++'] = 2 * ttail( 48, abs( `b' ) / `se' )
			if ( `i' + `j' == 2 ) {
				outreg2 using "`root'/paper/tables/RegsIndex_Statemonth`suffix'", keep(`rhsvar_gen') `outregoptions' ctitle("") replace ///
					addstat(Tracts, `tracts_n', Mean dependent variable, `meany', Mean index, `meanx') ///
					addtext(Max tract rideshare activity, `l1`i'')
			}
			else {
				outreg2 using "`root'/paper/tables/RegsIndex_Statemonth`suffix'", keep(`rhsvar_gen') `outregoptions' ctitle("") ///
					addstat(Tracts, `tracts_n', Mean dependent variable, `meany', Mean index, `meanx') ///
					addtext(Max tract rideshare activity, `l1`i'')
			}
			quietly replace `lhsvar' = `lhs_orig'
		}
		local `i++'
	}
}

// Table 3A: Tract and Month-of-Sample Fixed Effects -- Daytime All

// Column labels
local l11 = "Top 50 pct"
local l12 = "Top 25 pct"
// local l13 = "Nonzero"
local l21 = "All"
// local l22 = "FrSa 8p-4a"
local table "3a"

local suffix "_all"
foreach lhsvar of varlist ifatals_8_to_17 {
	quietly replace `lhs_orig' = `lhsvar'
	quietly replace `lhsvar' = 100 * `lhsvar'
	local i = 1
	foreach cutoff of numlist `c_50' `c_75' {
		local j = 1
		foreach rhsvar of varlist index_tract_weighted {
			tempvar sample`i'`j'
			if ( "`rhsvar'" == "index_tract_weighted" ) {
				quietly replace `rhsvar_gen' = `rhsvar' / `rescale_50'
			}
			else {
				quietly replace `rhsvar_gen' = `rhsvar' / `rescale_50_wknd'
			}
			reghdfe `lhsvar' `rhsvar_gen' if max_wgt_index > `cutoff' & year >= `start_year', ///
				absorb(tract_id statemonth) cluster(CBSAcode) `parallel'
			quietly egen `tag' = tag( tract_id ) if e(sample)
			count if `tag'
			local tracts_n = r(N)
			quietly drop `tag'
			quietly gen byte `sample`i'`j'' = e(sample)
			quietly sum `lhsvar' if e(sample)
			local meany = r(mean)
			quietly sum `rhsvar_gen' if e(sample)
			local meanx = r(mean)
			mata: pval_placebo_tab[`pval_placebo_count'] = "`table'"
			local b = _b[`rhsvar_gen']
			local se = _se[`rhsvar_gen']
			mata: pval_placebo[`pval_placebo_count++'] = 2 * ttail( 48, abs( `b' ) / `se' )
			if ( `i' + `j' == 2 ) {
				outreg2 using "`root'/paper/tables/RegsIndex_Day`suffix'", keep(`rhsvar_gen') `outregoptions' ctitle("") replace ///
					addstat(Tracts, `tracts_n', Mean dependent variable, `meany', Mean index, `meanx') ///
					addtext(Max tract rideshare activity, `l1`i'')
			}
			else {
				outreg2 using "`root'/paper/tables/RegsIndex_Day`suffix'", keep(`rhsvar_gen') `outregoptions' ctitle("") ///
					addstat(Tracts, `tracts_n', Mean dependent variable, `meany', Mean index, `meanx') ///
					addtext(Max tract rideshare activity, `l1`i'')
			}
		}
		local `i++'
	}
	quietly replace `lhsvar' = `lhs_orig'
}

// Table 3B: Tract and Month-of-Sample Fixed Effects -- Daytime Triple Diffs

// Column labels
local l11 = "Top 50 pct"
local l12 = "Top 25 pct"
// local l13 = "Nonzero"
local l21 = "All"
// local l22 = "FrSa 8p-4a"
local table "3b"

local suffix "_all"
capture: drop tract_month
egen long tract_month = group( tract_id yearmonth )
tempvar exp_id lhsvar_td rhsvar_gen_td
quietly gen float `rhsvar_gen_td' = .
foreach lhsvar of varlist ifatals {
	quietly gen `lhsvar_td' = 100 * `lhsvar'_8_to_17
	local i = 1
	foreach cutoff of numlist `c_50' `c_75' {
		local j = 1
		foreach rhsvar of varlist index_tract_weighted {
			sum `sample`i'`j''
			expand 2 if `sample`i'`j'', gen(`exp_id')
			capture: drop tract_daytime yearmonth_daytime statemonth_daytime 
			egen long tract_daytime = group( tract_id `exp_id' )
			egen int yearmonth_daytime = group( yearmonth `exp_id' )
			egen int statemonth_daytime = group( statemonth `exp_id' )
			quietly replace `lhsvar_td' = 100 * ( `lhsvar' - `lhsvar'_8_to_17 ) if `exp_id' == 1 & `sample`i'`j'' 
			if ( "`rhsvar'" == "index_tract_weighted" ) {
				quietly replace `rhsvar_gen_td' = ( `exp_id' == 1 ) * `rhsvar' / `rescale_50'
			}
			else {
				quietly replace `rhsvar_gen_td' = ( `exp_id' == 1 ) * `rhsvar' / `rescale_50_wknd'
			}
			label var `rhsvar_gen_td' "Rideshare index * Night"
			if ( "`lhsvar'" == "idrunkfatals" ) {
				label var `lhsvar_td' "Drunk death"
			}
			else {
				label var `lhsvar_td' "Any death"
			}
			reghdfe `lhsvar_td' `rhsvar_gen_td' if max_wgt_index > `cutoff' & year >= `start_year', ///
				absorb(tract_id tract_month tract_daytime yearmonth_daytime statemonth_daytime) cluster(CBSAcode) `parallel'
			quietly egen `tag' = tag( tract_id ) if e(sample)
			count if `tag'
			local tracts_n = r(N)
			quietly drop `tag'
			quietly sum `lhsvar_td' if e(sample)
			local meany = r(mean)
			quietly sum `rhsvar_gen_td' if e(sample)
			local meanx = r(mean)
			mata: pval_actual_tab[`pval_actual_count'] = "`table'"
			local b = _b[`rhsvar_gen_td']
			local se = _se[`rhsvar_gen_td']
			mata: pval_actual[`pval_actual_count++'] = 2 * ttail( 48, abs( `b' ) / `se' )
			outreg2 using "`root'/paper/tables/RegsIndex_Day`suffix'", keep(`rhsvar_gen_td') `outregoptions' ctitle("") ///
				addstat(Tracts, `tracts_n', Mean dependent variable, `meany', Mean index, `meanx') ///
				addtext(Max tract rideshare activity, `l1`i'')
			drop if `exp_id' == 1
			drop `exp_id'
		}
		local `i++'
	}
	drop `lhsvar_td'
}

// Table 4A: Drunk Fatalities -- Tract and State-by-Month Fixed Effects

// Column labels
local l11 = "Top 50 pct"
local l12 = "Top 25 pct"
local l21 = "All"
local l22 = "8p-6a"

capture: drop statemonth
egen int statemonth = group( state yearmonth )
replace `rhsvar_gen' = index_tract_weighted / `rescale_50'

local drunk_list "idrunkfatals"
local all_list "ifatals"
local suffix ""
local table "4"
foreach list in drunk_list {
	local i = 1
	foreach cutoff of numlist `c_50' `c_75' {
		local j = 1
		foreach lhsvar of varlist ``list'' {
			quietly replace `lhs_orig' = `lhsvar'
			quietly replace `lhsvar' = 100 * `lhsvar'
			reghdfe `lhsvar' `rhsvar_gen' if max_wgt_index > `cutoff' & year >= `start_year', ///
				absorb(tract_id statemonth) cluster(CBSAcode) `parallel'
			quietly egen `tag' = tag( tract_id ) if e(sample)
			count if `tag'
			local tracts_n = r(N)
			quietly drop `tag'
			quietly sum `lhsvar' if e(sample)
			local meany = r(mean)
			quietly sum `rhsvar_gen' if e(sample)
			local meanx = r(mean)
			mata: pval_actual_tab[`pval_actual_count'] = "`table'"
			local b = _b[`rhsvar_gen']
			local se = _se[`rhsvar_gen']
			mata: pval_actual[`pval_actual_count++'] = 2 * ttail( 48, abs( `b' ) / `se' )
			if ( `i' + `j' == 2 ) {
				outreg2 using "`root'/paper/tables/RegsIndex_StateMonth_Day`suffix'", keep(`rhsvar_gen') `outregoptions' ctitle("") replace ///
					addstat(Tracts, `tracts_n', Mean dependent variable, `meany', Mean index, `meanx') ///
					addtext(Max tract rideshare activity, `l1`i'')
			}
			else {
				outreg2 using "`root'/paper/tables/RegsIndex_StateMonth_Day`suffix'", keep(`rhsvar_gen') `outregoptions' ctitle("") ///
					addstat(Tracts, `tracts_n', Mean dependent variable, `meany', Mean index, `meanx') ///
					addtext(Max tract rideshare activity, `l1`i'')
			}
			quietly replace `lhsvar' = `lhs_orig'
		}
		local `i++'
	}
 local suffix "_all"
}


// Table 4B: Drunk fatalities -- Daytime Triple Diffs (Drunk)

// Column labels
local l11 = "Top 50 pct"
local l12 = "Top 25 pct"
// local l13 = "Nonzero"
local l21 = "All"
// local l22 = "FrSa 8p-4a"

local suffix ""
capture: drop tract_month
egen long tract_month = group( tract_id yearmonth )
tempvar exp_id lhsvar_td rhsvar_gen_td
capture: drop `rhsvar_gen_td'
quietly gen float `rhsvar_gen_td' = .
foreach lhsvar of varlist idrunkfatals {
	quietly gen `lhsvar_td' = 100 * `lhsvar'_8_to_17
	local i = 1
	foreach cutoff of numlist `c_50' `c_75' {
		local j = 1
		foreach rhsvar of varlist index_tract_weighted {
			sum `sample`i'`j''
			expand 2 if `sample`i'`j'', gen(`exp_id')
			capture: drop tract_daytime yearmonth_daytime statemonth_daytime 
			egen long tract_daytime = group( tract_id `exp_id' )
			egen int yearmonth_daytime = group( yearmonth `exp_id' )
			egen int statemonth_daytime = group( statemonth `exp_id' )
			quietly replace `lhsvar_td' = 100 * ( `lhsvar' - `lhsvar'_8_to_17 ) if `exp_id' == 1 & `sample`i'`j'' 
			if ( "`rhsvar'" == "index_tract_weighted" ) {
				quietly replace `rhsvar_gen_td' = ( `exp_id' == 1 ) * `rhsvar' / `rescale_50'
			}
			else {
				quietly replace `rhsvar_gen_td' = ( `exp_id' == 1 ) * `rhsvar' / `rescale_50_wknd'
			}
			label var `rhsvar_gen_td' "Rideshare index * Night"
			if ( "`lhsvar'" == "idrunkfatals" ) {
				label var `lhsvar_td' "Drunk death"
			}
			else {
				label var `lhsvar_td' "Any death"
			}
			reghdfe `lhsvar_td' `rhsvar_gen_td' if max_wgt_index > `cutoff' & year >= `start_year', ///
				absorb(tract_id tract_month tract_daytime yearmonth_daytime statemonth_daytime) cluster(CBSAcode) `parallel'
			quietly egen `tag' = tag( tract_id ) if e(sample)
			count if `tag'
			local tracts_n = r(N)
			quietly drop `tag'
			quietly sum `lhsvar_td' if e(sample)
			local meany = r(mean)
			quietly sum `rhsvar_gen_td' if e(sample)
			local meanx = r(mean)
			mata: pval_actual_tab[`pval_actual_count'] = "`table'"
			local b = _b[`rhsvar_gen_td']
			local se = _se[`rhsvar_gen_td']
			mata: pval_actual[`pval_actual_count++'] = 2 * ttail( 48, abs( `b' ) / `se' )
			outreg2 using "`root'/paper/tables/RegsIndex_StateMonth_Day`suffix'", keep(`rhsvar_gen_td') `outregoptions' ctitle("") ///
				addstat(Tracts, `tracts_n', Mean dependent variable, `meany', Mean index, `meanx') ///
				addtext(Max tract rideshare activity, `l1`i'')
			drop if `exp_id' == 1
			drop `exp_id'
		}
		local `i++'
	}
	drop `lhsvar_td'
}

// Figure A6: Tract and Month-of-Sample Fixed Effects -- Placebo treatments

set seed 220

!date

global UBER_MAIN_DIR "`root'"

cap program drop run_histograms
program define run_histograms
syntax varlist(min = 1 max = 1) [if], varlabel(string) outfile(string) start(real) width(real) addplot(string) ///
	xlabel(string) ylabel(string) [title(string)]
	tempvar plotvar
	quietly gen `plotvar' = `1'
	label var `plotvar' "`varlabel'"
	histogram `plotvar' `if', ///
		start(`start') width(`width') frac title("`title'") addplot(`addplot') legend(off) ///
		graphregion(fcolor(white)) fc(gs10) lc(gs14) lw(vthin) xlabel(`xlabel') ylabel(`ylabel') ///
		xtitle(, size(medlarge)) ytitle(, size(medlarge))
	graph save Graph "$UBER_MAIN_DIR/paper/figs/`outfile'.gph", replace
	graph export "$UBER_MAIN_DIR/paper/figs/`outfile'.eps", preview(off) fontface(Times) replace
	graph export "$UBER_MAIN_DIR/paper/figs/`outfile'.pdf", fontface(Times) replace
end

// Calculate Census tract percentiles by max index
centile max_wgt_index, centile(50 75)
local c_50 = round( r(c_1), 0.001)
local c_75 = round( r(c_2), 0.001)
local start_year = 2012
tempvar rhsvar_gen lhs_orig
gen float `rhsvar_gen' = .
gen float `lhs_orig' = .
sum index_tract_weighted if max_wgt_index > `c_50' & year == 2016 & month > 9 // Avg value in Q4 2016
local rescale_50 = 2 * r(mean) // Rescale factor to 2019

preserve
keep if max_wgt_index > `c_50' & year >= `start_year'
mata: sorter = .
mata: beta = J( 200, 2, . )
mata: se = J( 200, 2, . )
sort tract_id year month
assert yearmonth > yearmonth[_n-1] if tract_id == tract_id[_n-1]
tempvar random random_tract index_placebo yearmonth
gen double `random' = .
replace `rhsvar_gen' = index_tract_weighted / `rescale_50'
gen float `index_placebo' = `rhsvar_gen'
label var `index_placebo' "Placebo rideshare index"
gen int `yearmonth' = yearmonth

sort tract_id year month
local drunk_list "idrunkfatals"
local all_list "ifatals"
local suffix ""
local table "AX"
local c = 1
foreach list in all_list drunk_list {
	foreach cutoff of numlist `c_50' {
//	foreach cutoff of numlist `c_50' `c_75' {
		foreach lhsvar of varlist ``list'' {
			quietly replace `lhs_orig' = `lhsvar'
			quietly replace `lhsvar' = 100 * `lhsvar'
			forvalues r = 1(1)200 {
				replace `random' = runiform()
				by tract_id: egen double `random_tract' = mean( `random' )
				mata: st_view( sorter, ., "`random_tract' `yearmonth' `index_placebo'" )
				mata: sorter[.,.] = sort( sorter, ( 1, 2 ) )
				!date
				reghdfe `lhsvar' `index_placebo' if max_wgt_index > `cutoff' & year >= `start_year', ///
					absorb(tract_id yearmonth) cluster(CBSAcode) `parallel'
				local b = _b[`index_placebo']
				local se = _se[`index_placebo']
				mata: beta[`r',`c'] = `b'
				mata: se[`r',`c'] = `se'
				drop `random_tract'
			}			
			quietly replace `lhsvar' = `lhs_orig'
		local c = `c' + 1
		}
	}
}
!date
restore
mata: st_matrix( "beta", beta )
mata: st_matrix( "se", se )
svmat beta
svmat se
gen t_all = beta1 / se1
gen t_drunk = beta2 / se2

run_histograms beta1, ///
	varlabel("Placebo coefficient") ///
	outfile("placebo-histogram-all") ///
	start(-0.12) width(0.005) ///
	addplot("pci 0 -0.101 0.12 -0.101, lcolor(gs5) lpattern( dash )") ///
	xlabel(-.12(.04).12) ylabel(0(0.04)0.12)

// Figures 1, A4, A5: "Event study"

keep if max_wgt_index > 0.001 & year >= 2012
local breps = 200
local breps1 = `breps' + 1

set seed 320

sum max_wgt_index if max_wgt_index > 0.001 
local sd = round( r(sd), 0.01 )
dis "1 SD is `sd'"

cap: drop separation
gen float separation = .
cap: drop sd_separation
gen float sd_separation = .
cap: drop sd_value
gen float sd_value = .
cap: drop threshold
gen float threshold = .
local start_year = 2012
tempvar lhs_orig
gen float `lhs_orig' = .
tempvar resvar quarter treated event_time
gen byte `quarter' = ceil( month / 3 )
assert `quarter' >= 1 & `quarter' <= 4
bysort year month: sum index_tract_weighted if max_wgt_index > 1 & year >= 2012
sum max_wgt_index if max_wgt_index > 1 & year == 2016 & month == 12
dis "Treatment reaches 10% of eventual growth in Jan 2014 for treated group"

capture: drop year_es treated_*
gen float year_es = .
gen byte `event_time' = .
local t = 1
forvalues y = 2012(1)2016 {
	forvalues q = 1(1)4 {
		quietly replace `event_time' = `t++' if year == `y' & `quarter' == `q'
	}
}
forvalues threshold = 0.7(0.3)1.3 {
	local label = `threshold' * 10
	forvalues t = 0(1)1 {
		gen float treated_`label'_`t' = .
	}
	gen byte `treated' = .
	replace `treated' = 0 * ( max_wgt_index > 0.001 & max_wgt_index <= `threshold' ) + 1 * ( max_wgt_index > `threshold' ) if max_wgt_index != . & max_wgt_index > 0.001
	sum `treated' if `treated' == 1 & year == 2016 & month == 12, meanonly
	local treated_tracts = r(N)
	sum `treated' if `treated' == 0 & year == 2016 & month == 12, meanonly
	local control_tracts = r(N)
	dis "************************************************************"
	dis "`treated_tracts' treated tracts, `control_tracts' control tracts, threshold `threshold'"
	dis "************************************************************"
	local t = 1
	cap: drop event_dummy_*
	forvalues y = 2012(1)2016 {
		forvalues q = 1(1)4 {
			if !( ( `y' == 2012 & `q' == 1 ) ) {
				sum `event_time' if year == `y' & `quarter' == `q', meanonly
				local t = r(mean)
				quietly gen event_dummy_`y'_`q' = `treated' * ( abs( `event_time' - `t++' ) < 0.01 ) if `event_time' != .
			}
		}
	}
	cap: drop es_beta_`label' es_se_`label'
	quietly gen float es_beta_`label' = .
	quietly gen float es_se_`label' = .
	foreach lhsvar of varlist ifatals {
		quietly replace `lhs_orig' = `lhsvar'
		quietly replace `lhsvar' = 100 * `lhsvar'	
		tempfile orig_data
		save `orig_data', replace
		mata: es_raw_mean_treat = J( `breps1', 20, . )
		mata: es_raw_mean_control = J( `breps1', 20, . )
		forvalues b = 1(1)`breps1' {
			use `orig_data', clear
			if ( `b' != 1 ) {
				bsample, cluster( CBSAcode )
			}
			// Demean wrt "ever treated" status and year-month dummies
			quietly reg `lhsvar' i.yearmonth `treated'
			quietly predict `resvar', resid
			local i = 0
			forvalues y = 2012(1)2016 {
				forvalues q = 1(1)4 {
					local `i++'
					forvalues t = 0(1)1 {
						sum `resvar' if year == `y' & `quarter' == `q' & `treated' == `t', meanonly
						local mean = r(mean)
						if ( `t' ) {
							mata: es_raw_mean_treat[ `b', `i' ] = `mean'						
						}
						else {
							mata: es_raw_mean_control[ `b', `i' ] = `mean'						
						}						
						quietly replace treated_`label'_`t' = r(mean) in `i'
					}
				}
			}
			drop `resvar'		
		}
		use `orig_data', clear
		foreach type in treat control {
			mata: es_mean_`type' = J( `breps1', 20, . )
			// Take 3-period moving average (only 2 period MA in first/last periods)
			mata: es_mean_`type'[ ., 1 ] = ( es_raw_mean_`type'[ ., 1 ] + es_raw_mean_`type'[ ., 2 ] ) / 2
			mata: es_mean_`type'[ ., 20 ] = ( es_raw_mean_`type'[ ., 19 ] + es_raw_mean_`type'[ ., 20 ] ) / 2
			forvalues i = 2(1)19 {
				mata: es_mean_`type'[ ., `i' ] = ( es_raw_mean_`type'[ ., `i' - 1 ] + es_raw_mean_`type'[ ., `i' ] + ///
					es_raw_mean_`type'[ ., `i' + 1 ] ) / 3
			}
			mata: es_mean_`type' = es_mean_`type' :- es_mean_`type'[ . , 1 ]
			mata: es_sd_`type' = sqrt( diagonal( variance( es_mean_`type'[ 2..`breps1', . ] ) ) )
			mata: es_beta_`type' = es_mean_`type'[1,.]'
		}
		drop `treated'
		quietly replace `lhsvar' = `lhs_orig'
	}
	getmata es_beta_treat es_sd_treat es_beta_control es_sd_control, force replace
	foreach varname of varlist es_beta_treat es_sd_treat es_beta_control es_sd_control {
		rename `varname' `varname'_`label'
	}
}
capture: drop event_time event_year
gen byte event_time = _n in 1/20
gen float event_year = 2012 + ( event_time - 1 ) / 4
label var event_year "Year"
forvalues threshold = 0.7(0.3)1.3 {
	local label = `threshold' * 10
	capture: drop es_beta_*_`label'_ub es_beta_*_`label'_lb
	foreach type in treat control {
		gen es_beta_`type'_`label'_ub = es_beta_`type'_`label' + 1.96 * es_sd_`type'_`label'
		gen es_beta_`type'_`label'_lb = es_beta_`type'_`label' - 1.96 * es_sd_`type'_`label'
	}
	twoway ( rarea es_beta_treat_`label'_lb es_beta_treat_`label'_ub event_year, color( gs14 )  ) ///
		( rarea es_beta_control_`label'_lb es_beta_control_`label'_ub event_year, color( gs15 ) ) ///
		( line es_beta_treat_`label' event_year, lwidth( medthin ) lcolor( black ) ) ///
		( line es_beta_control_`label' event_year, lwidth( medthin ) lcolor( black ) lpattern( dash ) ), ///
		xlabel( 2012(1)2016 ) xtitle( "Year" ) ///
		graphregion(fcolor(white)) ytitle( "Probability of fatal accident" ) ///
		legend( order( 3 "Treated"  4 "Control" ) region( lstyle( none ) ) )
	graph export "`root'/paper/figs/Event_study_fatalities_`label'.pdf", as(pdf) replace
}

// Table A4: Results by crash type

replace `rhsvar_gen' = index_tract_weighted / `rescale_50'
label var `rhsvar_gen' "Rideshare index"
local table "A4"

local suffix "all"
local i = 1
foreach lhsvar of varlist ifatals_singveh ifatals_multiveh ideadpeds {
	quietly replace `lhs_orig' = `lhsvar'
	quietly replace `lhsvar' = 100 * `lhsvar'
	local cutoff = `c_50'
	reghdfe `lhsvar' `rhsvar_gen' if max_wgt_index > `cutoff' & year >= `start_year', ///
		absorb(tract_id yearmonth) cluster(CBSAcode) `parallel'
	quietly egen `tag' = tag( tract_id ) if e(sample)
	count if `tag'
	local tracts_n = r(N)
	quietly drop `tag'
	quietly sum `lhsvar' if e(sample)
	local meany = r(mean)
	quietly sum `rhsvar_gen' if e(sample)
	local meanx = r(mean)
	mata: pval_actual_tab[`pval_actual_count'] = "`table'"
	local b = _b[`rhsvar_gen']
	local se = _se[`rhsvar_gen']
	mata: pval_actual[`pval_actual_count++'] = 2 * ttail( 48, abs( `b' ) / `se' )
	if ( `i++' == 1 ) {
		outreg2 using "`root'/paper/tables/RegsIndexType`suffix'", keep(`rhsvar_gen') `outregoptions' ctitle("") replace ///
			addstat(Tracts, `tracts_n', Mean dependent variable, `meany', Mean index, `meanx')
	}
	else {
		outreg2 using "`root'/paper/tables/RegsIndexType`suffix'", keep(`rhsvar_gen') `outregoptions' ctitle("") ///
			addstat(Tracts, `tracts_n', Mean dependent variable, `meany', Mean index, `meanx')
	}
	quietly replace `lhsvar' = `lhs_orig'
}
local suffix "_all"

mata: st_sview( p_a_t = "", ., "pval_actual_tab")
mata: st_view( p_a = ., ., "pval_actual")
mata: st_sview( p_p_t = "", ., "pval_placebo_tab")
mata: st_view( p_p = ., ., "pval_placebo")
mata: p_a_t[1..50] = pval_actual_tab
mata: p_a[1..50] = pval_actual
mata: p_p_t[1..50] = pval_placebo_tab
mata: p_p[1..50] = pval_placebo

// Report p-values from analyses for FDR adjustment (use FDR adjusment code from Michael Anderson's website)
browse pval_* in 1/50

*---------------------------------------------------------------*
*-----------------FATAL ACCIDENTS BY HOUR OF DAY----------------*
*---------------------------------------------------------------*

cd "`root'/stata/FARS"
run "`root'/stata/FARS/gen completeFARS.do"

// Range plot for entire U.S. sample
use completeFARS.dta, clear 
keep if year >= 2012
drop if hour == 99
replace hour = 24 if hour == 0
collapse (sum) fatals_alcohol fatals_nonalcohol, by(year hour)
collapse (mean) fatals_alcohol fatals_nonalcohol, by(hour)
gen y1 = 0
gen y2 = fatals_alcohol
gen y3 = fatals_alcohol + fatals_nonalcohol
gen hourstart8 = hour - 7
replace hourstart8 = hourstart8 + 24 if hourstart8 <= 0
sort hourstart8
twoway rarea y1 y2 hourstart8, color(dkorange) || rarea y2 y3 hourstart8, color(dkorange*0.4) ///
       xlabel(1 "8am" 5 "noon" 9 "4pm" 13 "8pm" 17 "Midnight" 21 "4am" 24 "7am", labsize(small)) ///
	   ylabel(, labsize(small) nogrid) aspectratio(0.5) ///
	   ytitle("Average annual fatalities by hour-of-day", size(small)) xtitle("", size(small)) ///
	   plotregion(style(none)) graphregion(fcolor(white)) legend(off) ///
	   text(300 16 "Alcohol-related fatalities", size(small) color(black)) ///
	   text(1200 13 "All fatalities", size(small) color(black))
graph export "`root'/paper/figs/FatalitiesByHour.pdf", replace 

// Range plot for U.S. cities with 100,000+ population
use completeFARS.dta, clear
keep if _merge == 3
keep if year >= 2010
drop if hour == 99
replace hour = 24 if hour == 0
collapse (sum) fatals_alcohol fatals_nonalcohol, by(year hour)
collapse (mean) fatals_alcohol fatals_nonalcohol, by(hour)
gen y1 = 0
gen y2 = fatals_alcohol
gen y3 = fatals_alcohol + fatals_nonalcohol
gen hourstart8 = hour - 7
	replace hourstart8 = hourstart8 + 24 if hourstart8 <= 0
	sort hourstart8
twoway rarea y1 y2 hourstart8, color(dkorange) || rarea y2 y3 hourstart8, color(dkorange*0.4) ///
       xlabel(1 "8am" 5 "noon" 9 "4pm" 13 "8pm" 17 "Midnight" 21 "4am" 24 "7am", labsize(small)) ///
	   ylabel(, labsize(small) nogrid) aspectratio(0.5) ///
	   ytitle("Average annual fatalities by hour-of-day", size(small)) xtitle("", size(small)) ///
	   plotregion(style(none)) graphregion(fcolor(white)) legend(off) ///
	   text(50 17 "Alcohol-related fatalities", size(small) color(black)) ///
	   text(250 14 "All fatalities", size(small) color(black))
graph export "`root'/paper/figs/FatalitiesByHourCities.pdf", replace 

// Range plot for U.S. cities with 100,000+ population normalized by CA VMT
tempvar sorted
use completeFARS.dta, clear
keep if _merge == 3
keep if year >= 2012
drop if hour == 99
replace hour = 24 if hour == 0
collapse (sum) fatals_alcohol fatals_nonalcohol, by(year hour)
collapse (mean) fatals_alcohol fatals_nonalcohol, by(hour)
sort hour
save `sorted', replace
insheet using "`root'/raw data/other/pems_output-VMT-hour.csv", clear
keep hour mean
rename mean mean_vmt
merge 1:1 hour using `sorted'
assert _merge == 3
drop _merge
destring mean_vmt, replace ignore(",")
gen y1 = 0
gen y2 = 10 ^ 6 * fatals_alcohol / mean_vmt
gen y3 = 10 ^ 6 * ( fatals_alcohol + fatals_nonalcohol ) / mean_vmt
gen hourstart8 = hour - 7
	replace hourstart8 = hourstart8 + 24 if hourstart8 <= 0
	sort hourstart8
twoway rarea y1 y2 hourstart8, color(dkorange) || rarea y2 y3 hourstart8, color(dkorange*0.4) ///
       xlabel(1 "8am" 5 "noon" 9 "4pm" 13 "8pm" 17 "Midnight" 21 "4am" 24 "7am", labsize(small)) ///
	   ylabel(, labsize(small) nogrid) aspectratio(0.5) ///
	   ytitle("VMT-normalized annual fatalities by hour-of-day", size(small)) xtitle("", size(small)) ///
	   plotregion(style(none)) graphregion(fcolor(white)) legend(off) ///
	   text(10 18 "Alcohol-related fatalities", size(small) color(black)) ///
	   text(95 19 "All fatalities", size(small) color(black))
graph export "`root'/paper/figs/FatalitiesByVMTHourCities.pdf", replace
